package com.lyfx.reptiles.utils;

import com.lyfx.reptiles.pojo.Content;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.springframework.stereotype.Component;

import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;

/**
 * @author lsy
 * @create 2020-07-20-16:46
 */
@Component
public class HtmlParseUtil {
//    public static void main(String[] args) throws Exception {
//        for(int i=1;i<3;i++){
//            new HtmlParseUtil().parseJd("心理学",i).forEach(System.out::println);
//            System.out.println("第"+i+"页===========");
//        }
//    }

    public List<Content> parseJd(String keyWords,Integer page) throws Exception {
        String url = "https://search.jd.com/Search?keyword="+keyWords+"&page="+page+"&enc=utf-8";
        Document document = Jsoup.parse(new URL(url), 30000);
        Elements li = document.select("#J_goodsList>ul>li");
//        Elements li = element.getElementsByTag("li");
        ArrayList<Content> list = new ArrayList<>();
        for (Element lis : li) {
            //source-data-lazy-img
            String img = lis.getElementsByTag("img").eq(0).attr("src");
            String price = lis.getElementsByClass("p-price").eq(0).text();
            String title = lis.getElementsByClass("p-name").eq(0).text();
            Content content = new Content(title,price, img);
            list.add(content);
        }
        return list;
    }
}
